{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import torch\n",
    "import gym\n",
    "import torch.nn as nn\n",
    "from collections import deque\n",
    "import torch.nn.functional as F\n",
    "from torchvision import transforms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "众多超参数初始化\n",
    "\"\"\"\n",
    "\n",
    "# Replay Memory Capacity\n",
    "CapacityReplayMemory = 10000\n",
    "# e-贪心的epsilon\n",
    "EPSILON = 0.1\n",
    "# minibatch的size\n",
    "BATCH_SIZE = 32\n",
    "GAMMA = 0.99\n",
    "N_ACTION = 4\n",
    "N_EPISODES = 10000\n",
    "T = 1000\n",
    "USE_CUDA = torch.cuda.is_available()\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([3, 4])\n",
      "tensor([-0.0019, -0.0019, -0.0019], device='cuda:0', grad_fn=<MaxBackward0>)\n"
     ]
    }
   ],
   "source": [
    "class DQN(nn.Module):\n",
    "    def __init__(self, in_channels = 3, n_actions = 4):\n",
    "        \"\"\"\n",
    "        默认的输入尺寸是84 X 84 X 3的tensor\n",
    "        所以在第三个卷积之后，尺寸是7 X 7 X 64\n",
    "        我们默认使用Breakout游戏来测试，所以默认的in_channels = 3, 动作的数量是4\n",
    "        \"\"\"\n",
    "        super(DQN, self).__init__()\n",
    "        self.conv1 = nn.Conv2d(in_channels, 32, kernel_size=8, stride=4)\n",
    "        self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2)\n",
    "        self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1)\n",
    "        self.flatten = nn.Flatten()\n",
    "        self.fc1 = nn.Linear(7 * 7 * 64, 512)\n",
    "        self.fc2 = nn.Linear(512, n_actions)\n",
    "\n",
    "    def forward(self, x):\n",
    "        x = self.conv1(x)\n",
    "        x = F.relu(x)\n",
    "        x = self.conv2(x)\n",
    "        x = F.relu(x)\n",
    "        x = self.conv3(x)\n",
    "        x = F.relu(x)\n",
    "        x = self.flatten(x)\n",
    "        x = self.fc1(x)\n",
    "        x = F.relu(x)\n",
    "        x = self.fc2(x)\n",
    "        return x\n",
    "\n",
    "tmp = torch.from_numpy(np.ones(shape = (3, 3, 84, 84))).type(torch.cuda.FloatTensor)\n",
    "net = DQN().cuda()\n",
    "res = net(tmp)\n",
    "g = res.max(1)\n",
    "print(res.shape)\n",
    "print(g[0])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {},
   "outputs": [],
   "source": [
    "ReplayMemory = deque(maxlen=CapacityReplayMemory)\n",
    "def sampleFromReplayMemory(batch_size):\n",
    "    indices = np.random.permutation(len(ReplayMemory))[:batch_size]\n",
    "    container = [[], [], [], [], []]\n",
    "    for idx in indices:\n",
    "        memory = ReplayMemory[idx]\n",
    "        for col, value in zip(container, memory):\n",
    "            col.append(value)\n",
    "    cols = [np.array(col) for col in container]\n",
    "    return cols\n",
    "\n",
    "\n",
    "\n",
    "def epsilon_greedy(q_values, epsilon, n_action):\n",
    "    rd = np.random.uniform(0, 1)\n",
    "    if rd < 1 - epsilon:\n",
    "        tmp_q = q_values.detach().cpu()\n",
    "        action = np.argmax(tmp_q).numpy()\n",
    "    else:\n",
    "        action = np.random.randint(0, n_action)\n",
    "    return action"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {},
   "outputs": [],
   "source": [
    "Reward_Recorder = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "def train_DQN(\n",
    "    env:gym.Env,\n",
    "    q_net:DQN,\n",
    "    batch_size,\n",
    "    gamma,\n",
    "    replayMemory:deque,\n",
    "    n_episodes,\n",
    "    epsilon,\n",
    "    n_action,\n",
    "    optimizer,\n",
    "    use_cuda = True,\n",
    "    T = 1000\n",
    "    ):\n",
    "    \n",
    "    best_reward = 0\n",
    "    transform = transforms.Compose([\n",
    "\t    transforms.ToPILImage(),\n",
    "\t    transforms.Resize((110, 84)),\n",
    "\t    # transforms.RandomResizedCrop(224,scale=(0.5,1.0)),\n",
    "\t    transforms.RandomCrop((84, 84)),\n",
    "\t    transforms.ToTensor(),\n",
    "\t    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n",
    "    ])\n",
    "\n",
    "    if use_cuda:\n",
    "        q_net.cuda()\n",
    "    for episode in range(n_episodes):\n",
    "        reward_container = 0.0\n",
    "        state_numpy = env.reset()\n",
    "        state = transform(state_numpy).unsqueeze(0)\n",
    "        \n",
    "        if use_cuda:\n",
    "            state = state.cuda()\n",
    "        for _ in range(T):\n",
    "            q_values = q_net(state)\n",
    "            action = epsilon_greedy(q_values, epsilon, n_action)\n",
    "            new_state_numpy, reward, done, info = env.step(action)\n",
    "            new_state = transform(new_state_numpy).unsqueeze(0)\n",
    "            if use_cuda:\n",
    "                new_state = new_state.cuda()\n",
    "            reward_container += reward\n",
    "            \n",
    "            new_frame = [state_numpy, action, reward, new_state_numpy, done]\n",
    "            replayMemory.append(new_frame)\n",
    "            state_batch_numpy, action_batch, reward_batch, newState_batch_numpy, done_batch = sampleFromReplayMemory(batch_size)\n",
    "            \n",
    "            state = new_state\n",
    "            state_numpy = new_state_numpy\n",
    "\n",
    "            num = state_batch_numpy.shape[0]\n",
    "            # state_batch = transform(state_batch_numpy[0])\n",
    "            if num == 1:\n",
    "                state_batch = transform(state_batch_numpy[0]).unsqueeze(0)\n",
    "            else:\n",
    "                state_batch = torch.stack([transform(state_batch_numpy[i]) for i in range(num)])\n",
    "\n",
    "            # state_batch = torch.from_numpy(state_batch) / 255.0\n",
    "            action_batch = torch.from_numpy(action_batch).long()\n",
    "            reward_batch = torch.from_numpy(reward_batch)\n",
    "            \n",
    "            if num == 1:\n",
    "                newState_batch = transform(newState_batch_numpy[0]).unsqueeze(0)\n",
    "            else:\n",
    "                newState_batch = torch.stack([transform(newState_batch_numpy[i]) for i in range(num)])\n",
    "            # newState_batch = torch.from_numpy(newState_batch) / 255.0\n",
    "            not_done_batch = torch.from_numpy(1 - done_batch)\n",
    "\n",
    "            # state_batch = transforms.RandomResizedCrop(84)(state_batch)\n",
    "            # newState_batch = transforms.RandomResizedCrop(84)(newState_batch)\n",
    "\n",
    "            if use_cuda:\n",
    "                state_batch = state_batch.cuda()\n",
    "                action_batch = action_batch.cuda()\n",
    "                reward_batch = reward_batch.cuda()\n",
    "                newState_batch = newState_batch.cuda()\n",
    "                not_done_batch = not_done_batch.cuda()\n",
    "\n",
    "            next_max_q = q_net(newState_batch).detach().max(1)[0] * not_done_batch\n",
    "            y = reward_batch + gamma * next_max_q\n",
    "            cur_q_values = q_net(state_batch).gather(1, action_batch.unsqueeze(1)).unsqueeze(1)\n",
    "            loss:torch.Tensor = nn.MSELoss()(y.float(), cur_q_values.float())\n",
    "            optimizer.zero_grad()\n",
    "            loss.backward()\n",
    "            optimizer.step()\n",
    "\n",
    "            if done:\n",
    "                break\n",
    "        \n",
    "        \n",
    "        \n",
    "        best_reward = max(best_reward, reward_container)\n",
    "        Reward_Recorder.append(reward_container)\n",
    "        if(episode % 2 == 0):\n",
    "            print(\"Epoch {} Episode {}\".format(episode*T // 50000, episode))\n",
    "            print(\"    Episode reward:\", reward_container)\n",
    "            print(\"    Best reward:\", best_reward)\n",
    "        if(episode % 25 == 0):\n",
    "            torch.save(q_net, \"DQN.pth\")\n",
    "            np.save(\"Reward_Episode.npy\", np.array(Reward_Recorder))\n",
    "\n",
    "\n",
    "def visualizer(env, q_net, n_episodes, T=1000, use_cuda=True, n_action=4):\n",
    "    if use_cuda:\n",
    "        q_net.cuda()\n",
    "    for episode in range(n_episodes):\n",
    "        state = env.reset()\n",
    "        state.transpose(2, 0, 1)\n",
    "        state = torch.from_numpy(state).unsqueeze(0) / 255.0\n",
    "        if use_cuda:\n",
    "            state = state.cuda()\n",
    "        for t in range(T):\n",
    "            env.render()\n",
    "            q_values = q_net(state)\n",
    "            action = epsilon_greedy(q_values, 0, n_action)\n",
    "            # action = env.action_space.sample()\n",
    "            newState, reward, done, info = env.step(action)\n",
    "            newState.transpose(2, 0, 1)\n",
    "            newState = torch.from_numpy(newState).unsqueeze(0) / 255.0\n",
    "            state = newState\n",
    "            if use_cuda:\n",
    "                state = state.cuda()\n",
    "            if done:\n",
    "                print(\"Episode finished after {} timesteps\".format(t+1))\n",
    "                break\n",
    "    env.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {},
   "outputs": [],
   "source": [
    "def main():\n",
    "    q_net = DQN()\n",
    "    optimizer = torch.optim.Adam(q_net.parameters(), lr=0.00025)\n",
    "    env = gym.make(\"Breakout-v0\")\n",
    "    train_DQN(env, q_net, BATCH_SIZE, GAMMA, ReplayMemory, N_EPISODES, EPSILON, N_ACTION, optimizer, USE_CUDA, T)\n",
    "    torch.save(q_net, \"DQN.pth\")\n",
    "    visualizer(env, q_net, 10, 1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 149,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 0 Episode 0\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 0\n",
      "Epoch 0 Episode 2\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 2.0\n",
      "Epoch 0 Episode 4\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 2.0\n",
      "Epoch 0 Episode 6\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 2.0\n",
      "Epoch 0 Episode 8\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 2.0\n",
      "Epoch 0 Episode 10\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 2.0\n",
      "Epoch 0 Episode 12\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 3.0\n",
      "Epoch 0 Episode 14\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 3.0\n",
      "Epoch 0 Episode 16\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 3.0\n",
      "Epoch 0 Episode 18\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 3.0\n",
      "Epoch 0 Episode 20\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 3.0\n",
      "Epoch 0 Episode 22\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 3.0\n",
      "Epoch 0 Episode 24\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 3.0\n",
      "Epoch 0 Episode 26\n",
      "    Episode reward: 8.0\n",
      "    Best reward: 8.0\n",
      "Epoch 0 Episode 28\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 0 Episode 30\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 0 Episode 32\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 0 Episode 34\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 0 Episode 36\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 0 Episode 38\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 0 Episode 40\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 0 Episode 42\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 0 Episode 44\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 0 Episode 46\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 0 Episode 48\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 50\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 52\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 54\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 56\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 58\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 60\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 62\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 64\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 66\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 68\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 70\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 72\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 74\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 76\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 78\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 80\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 82\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 84\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 86\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 88\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 90\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 92\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 94\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 96\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 1 Episode 98\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 100\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 102\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 104\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 106\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 108\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 110\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 112\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 114\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 116\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 118\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 120\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 122\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 124\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 126\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 128\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 130\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 132\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 134\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 136\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 138\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 140\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 142\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 144\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 146\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 2 Episode 148\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 150\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 152\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 154\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 156\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 158\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 160\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 162\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 164\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 166\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 168\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 170\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 172\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 174\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 176\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 178\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 180\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 182\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 184\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 186\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 188\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 190\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 192\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 194\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 196\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 3 Episode 198\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 200\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 202\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 204\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 206\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 208\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 210\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 212\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 214\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 216\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 218\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 220\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 222\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 224\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 226\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 228\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 230\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 232\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 234\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 236\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 238\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 240\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 242\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 244\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 246\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 4 Episode 248\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 250\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 252\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 254\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 256\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 258\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 260\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 262\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 264\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 266\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 268\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 270\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 272\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 274\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 276\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 278\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 280\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 282\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 284\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 286\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 288\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 290\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 292\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 294\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 296\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 5 Episode 298\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 300\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 302\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 304\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 306\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 308\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 310\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 312\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 314\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 316\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 318\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 320\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 322\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 324\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 326\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 328\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 330\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 332\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 334\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 336\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 338\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 340\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 342\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 344\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 346\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 6 Episode 348\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 350\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 352\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 354\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 356\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 358\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 360\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 362\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 364\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 366\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 368\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 370\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 372\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 374\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 376\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 378\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 380\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 382\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 384\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 386\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 388\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 390\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 392\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 394\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 396\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 7 Episode 398\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 400\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 402\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 404\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 406\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 408\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 410\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 412\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 414\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 416\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 418\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 420\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 422\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 424\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 426\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 428\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 430\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 432\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 434\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 436\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 438\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 440\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 442\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 444\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 446\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 8 Episode 448\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 450\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 452\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 454\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 456\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 458\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 460\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 462\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 464\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 466\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 468\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 470\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 472\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 474\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 476\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 478\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 480\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 482\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 484\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 486\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 488\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 490\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 492\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 494\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 496\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 9 Episode 498\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 500\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 502\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 504\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 506\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 508\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 510\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 512\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 514\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 516\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 518\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 520\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 522\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 524\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 526\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 528\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 530\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 532\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 534\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 536\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 538\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 540\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 542\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 544\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 546\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 8.0\n",
      "Epoch 10 Episode 548\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 550\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 552\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 554\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 556\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 558\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 560\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 562\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 564\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 566\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 568\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 570\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 572\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 574\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 576\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 578\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 580\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 582\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 584\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 586\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 588\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 590\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 592\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 594\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 596\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 11 Episode 598\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 600\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 602\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 604\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 606\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 608\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 610\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 612\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 614\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 616\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 618\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 620\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 622\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 624\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 626\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 628\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 630\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 632\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 634\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 636\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 638\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 640\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 642\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 644\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 646\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 12 Episode 648\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 650\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 652\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 654\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 656\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 658\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 660\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 662\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 664\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 666\n",
      "    Episode reward: 6.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 668\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 670\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 672\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 674\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 676\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 678\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 680\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 682\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 684\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 686\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 688\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 690\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 692\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 694\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 696\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 8.0\n",
      "Epoch 13 Episode 698\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 14 Episode 700\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 8.0\n",
      "Epoch 14 Episode 702\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 14 Episode 704\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 8.0\n",
      "Epoch 14 Episode 706\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 708\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 710\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 712\n",
      "    Episode reward: 6.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 714\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 716\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 718\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 720\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 722\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 724\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 726\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 728\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 730\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 732\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 734\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 736\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 738\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 740\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 742\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 744\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 746\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 14 Episode 748\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 750\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 752\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 754\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 756\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 758\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 760\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 762\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 764\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 766\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 768\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 770\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 772\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 774\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 776\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 778\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 780\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 782\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 784\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 786\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 788\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 790\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 792\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 794\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 796\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 15 Episode 798\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 800\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 802\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 804\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 806\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 808\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 810\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 812\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 814\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 816\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 818\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 820\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 822\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 824\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 826\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 828\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 830\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 832\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 834\n",
      "    Episode reward: 6.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 836\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 838\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 840\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 842\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 844\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 846\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 16 Episode 848\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 850\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 852\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 854\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 856\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 858\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 860\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 862\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 864\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 866\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 868\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 870\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 872\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 874\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 876\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 878\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 880\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 882\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 884\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 886\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 888\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 890\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 892\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 894\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 896\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 17 Episode 898\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 900\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 902\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 904\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 906\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 908\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 910\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 912\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 914\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 916\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 918\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 920\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 922\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 924\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 926\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 928\n",
      "    Episode reward: 6.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 930\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 932\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 934\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 936\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 938\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 940\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 942\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 944\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 946\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 18 Episode 948\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 950\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 952\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 954\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 956\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 958\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 960\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 962\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 964\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 966\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 968\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 970\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 972\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 974\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 976\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 978\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 980\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 982\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 984\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 986\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 988\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 990\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 992\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 994\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 996\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 19 Episode 998\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1000\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1002\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1004\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1006\n",
      "    Episode reward: 7.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1008\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1010\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1012\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1014\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1016\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1018\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1020\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1022\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1024\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1026\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1028\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1030\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1032\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1034\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1036\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1038\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1040\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1042\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1044\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1046\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 20 Episode 1048\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1050\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1052\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1054\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1056\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1058\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1060\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1062\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1064\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1066\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1068\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1070\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1072\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1074\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1076\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1078\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1080\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1082\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1084\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1086\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1088\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1090\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1092\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1094\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1096\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 21 Episode 1098\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1100\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1102\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1104\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1106\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1108\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1110\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1112\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1114\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1116\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1118\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1120\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1122\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1124\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1126\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1128\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1130\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1132\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1134\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1136\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1138\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1140\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1142\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1144\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1146\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 22 Episode 1148\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1150\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1152\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1154\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1156\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1158\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1160\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1162\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1164\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1166\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1168\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1170\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1172\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1174\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1176\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1178\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1180\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1182\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1184\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1186\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1188\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1190\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1192\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1194\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1196\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 23 Episode 1198\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1200\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1202\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1204\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1206\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1208\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1210\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1212\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1214\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1216\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1218\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1220\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1222\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1224\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1226\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1228\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1230\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1232\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1234\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1236\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1238\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1240\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1242\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1244\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1246\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 24 Episode 1248\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1250\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1252\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1254\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1256\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1258\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1260\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1262\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1264\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1266\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1268\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1270\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1272\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1274\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1276\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1278\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1280\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1282\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1284\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1286\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1288\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1290\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1292\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1294\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1296\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 25 Episode 1298\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1300\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1302\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1304\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1306\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1308\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1310\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1312\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1314\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1316\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1318\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1320\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1322\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1324\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1326\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1328\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1330\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1332\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1334\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1336\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1338\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1340\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1342\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1344\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1346\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 26 Episode 1348\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1350\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1352\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1354\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1356\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1358\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1360\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1362\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1364\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1366\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1368\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1370\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1372\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1374\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1376\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1378\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1380\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1382\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1384\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1386\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1388\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1390\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1392\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1394\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1396\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 9.0\n",
      "Epoch 27 Episode 1398\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1400\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1402\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1404\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1406\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1408\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1410\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1412\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1414\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1416\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1418\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1420\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1422\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1424\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1426\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1428\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1430\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1432\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1434\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1436\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1438\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1440\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1442\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1444\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1446\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 28 Episode 1448\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1450\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1452\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1454\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1456\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1458\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1460\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1462\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1464\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1466\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1468\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1470\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1472\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1474\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1476\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1478\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1480\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1482\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1484\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1486\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1488\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1490\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1492\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1494\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1496\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 29 Episode 1498\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1500\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1502\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1504\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1506\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1508\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1510\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1512\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1514\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1516\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1518\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1520\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1522\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1524\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1526\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1528\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1530\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1532\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1534\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1536\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1538\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1540\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1542\n",
      "    Episode reward: 6.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1544\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1546\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 30 Episode 1548\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1550\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1552\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1554\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1556\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1558\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1560\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1562\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1564\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1566\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1568\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1570\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1572\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1574\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1576\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1578\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1580\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1582\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1584\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1586\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1588\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1590\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1592\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1594\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1596\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 31 Episode 1598\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1600\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1602\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1604\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1606\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1608\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1610\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1612\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1614\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1616\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1618\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1620\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1622\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1624\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1626\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1628\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1630\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1632\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1634\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1636\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1638\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1640\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1642\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1644\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1646\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 32 Episode 1648\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1650\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1652\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1654\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1656\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1658\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1660\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1662\n",
      "    Episode reward: 8.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1664\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1666\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1668\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1670\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1672\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1674\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1676\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1678\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1680\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1682\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1684\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1686\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1688\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1690\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1692\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1694\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1696\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 33 Episode 1698\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1700\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1702\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1704\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1706\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1708\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1710\n",
      "    Episode reward: 5.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1712\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1714\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1716\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1718\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1720\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1722\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1724\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1726\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1728\n",
      "    Episode reward: 4.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1730\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1732\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1734\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1736\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1738\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1740\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1742\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1744\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1746\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 34 Episode 1748\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1750\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1752\n",
      "    Episode reward: 6.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1754\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1756\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1758\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1760\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1762\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1764\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1766\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1768\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1770\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1772\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1774\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1776\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1778\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1780\n",
      "    Episode reward: 0.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1782\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1784\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1786\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1788\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1790\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1792\n",
      "    Episode reward: 1.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1794\n",
      "    Episode reward: 3.0\n",
      "    Best reward: 9.0\n",
      "Epoch 35 Episode 1796\n",
      "    Episode reward: 2.0\n",
      "    Best reward: 9.0\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-149-263240bbee7e>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mmain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m<ipython-input-148-0128f23d5f64>\u001b[0m in \u001b[0;36mmain\u001b[1;34m()\u001b[0m\n\u001b[0;32m      3\u001b[0m     \u001b[0moptimizer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptim\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mAdam\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mq_net\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparameters\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlr\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.00025\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m     \u001b[0menv\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgym\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmake\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Breakout-v0\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m     \u001b[0mtrain_DQN\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mq_net\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mBATCH_SIZE\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mGAMMA\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mReplayMemory\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mN_EPISODES\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mEPSILON\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mN_ACTION\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mUSE_CUDA\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mT\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      6\u001b[0m     \u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mq_net\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"DQN.pth\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      7\u001b[0m     \u001b[0mvisualizer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0menv\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mq_net\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m10\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1000\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-147-70454076d252>\u001b[0m in \u001b[0;36mtrain_DQN\u001b[1;34m(env, q_net, batch_size, gamma, replayMemory, n_episodes, epsilon, n_action, optimizer, use_cuda, T)\u001b[0m\n\u001b[0;32m     62\u001b[0m                 \u001b[0mnewState_batch\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtransform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnewState_batch_numpy\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0munsqueeze\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     63\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 64\u001b[1;33m                 \u001b[0mnewState_batch\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstack\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mtransform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnewState_batch_numpy\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnum\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     65\u001b[0m             \u001b[1;31m# newState_batch = torch.from_numpy(newState_batch) / 255.0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     66\u001b[0m             \u001b[0mnot_done_batch\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfrom_numpy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mdone_batch\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-147-70454076d252>\u001b[0m in \u001b[0;36m<listcomp>\u001b[1;34m(.0)\u001b[0m\n\u001b[0;32m     62\u001b[0m                 \u001b[0mnewState_batch\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtransform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnewState_batch_numpy\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0munsqueeze\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     63\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 64\u001b[1;33m                 \u001b[0mnewState_batch\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstack\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mtransform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnewState_batch_numpy\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnum\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     65\u001b[0m             \u001b[1;31m# newState_batch = torch.from_numpy(newState_batch) / 255.0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     66\u001b[0m             \u001b[0mnot_done_batch\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfrom_numpy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mdone_batch\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\torchvision\\transforms\\transforms.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, img)\u001b[0m\n\u001b[0;32m     58\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mimg\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     59\u001b[0m         \u001b[1;32mfor\u001b[0m \u001b[0mt\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtransforms\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 60\u001b[1;33m             \u001b[0mimg\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mt\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mimg\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     61\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[0mimg\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     62\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\torch\\nn\\modules\\module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[1;34m(self, *input, **kwargs)\u001b[0m\n\u001b[0;32m   1049\u001b[0m         if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[0;32m   1050\u001b[0m                 or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[1;32m-> 1051\u001b[1;33m             \u001b[1;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1052\u001b[0m         \u001b[1;31m# Do not call functions when jit is used\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1053\u001b[0m         \u001b[0mfull_backward_hooks\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\torchvision\\transforms\\transforms.py\u001b[0m in \u001b[0;36mforward\u001b[1;34m(self, img)\u001b[0m\n\u001b[0;32m    295\u001b[0m             \u001b[0mPIL\u001b[0m \u001b[0mImage\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mTensor\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mRescaled\u001b[0m \u001b[0mimage\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    296\u001b[0m         \"\"\"\n\u001b[1;32m--> 297\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mF\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mresize\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mimg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msize\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minterpolation\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmax_size\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mantialias\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    298\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    299\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m__repr__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\torchvision\\transforms\\functional.py\u001b[0m in \u001b[0;36mresize\u001b[1;34m(img, size, interpolation, max_size, antialias)\u001b[0m\n\u001b[0;32m    399\u001b[0m             )\n\u001b[0;32m    400\u001b[0m         \u001b[0mpil_interpolation\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpil_modes_mapping\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0minterpolation\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 401\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mF_pil\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mresize\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mimg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msize\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0msize\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minterpolation\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpil_interpolation\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmax_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmax_size\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    402\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    403\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0mF_t\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mresize\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mimg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msize\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0msize\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minterpolation\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0minterpolation\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmax_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmax_size\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mantialias\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mantialias\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\torchvision\\transforms\\functional_pil.py\u001b[0m in \u001b[0;36mresize\u001b[1;34m(img, size, interpolation, max_size)\u001b[0m\n\u001b[0;32m    239\u001b[0m                 \u001b[1;34m\"i.e. size should be an int or a sequence of length 1 in torchscript mode.\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    240\u001b[0m             )\n\u001b[1;32m--> 241\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mimg\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mresize\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msize\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minterpolation\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    242\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    243\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\PIL\\Image.py\u001b[0m in \u001b[0;36mresize\u001b[1;34m(self, size, resample, box, reducing_gap)\u001b[0m\n\u001b[0;32m   1920\u001b[0m                 )\n\u001b[0;32m   1921\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1922\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_new\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mim\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mresize\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msize\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresample\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbox\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1923\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1924\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mreduce\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfactor\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbox\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "main()"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "494899efd6527d56ea7f55c588d0081523a17dc3a9ff1107f3394ad815ff2527"
  },
  "kernelspec": {
   "display_name": "Python 3.7.7 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
